*Write a permanent log file;
PROC PRINTTO LOG="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P01 ED Code up Visits.log" NEW;
RUN;

*Write a permanent list file;
PROC PRINTTO PRINT="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P01 ED Code up Visits.lst" NEW;
RUN;

/*
This program combines the state by year files used for the analysis of Emergency Department Visits

NOTES
1. The inpatient data in NJ is dropped because with the variables available on the file it is not possible compute exact age
2. NJ and IA appear to be imputing month of birth for some observations (or masking) - it is not clear from the 
   documentation or the month profile how the imputation (or masking) is being done
3. CA is masking month of admission and gender for some observations - as noted below this 
   is fixable for the ED data
4. Gender is missing for 168,076 out of 8,097,331 observations due almost entirely to
   to California masking gender due to confidentiality reasons
5. There is no charge variable in California ED data
6. Gender is suppressed in about 5 percent of CA ED obs and 9 percent of CA inpatient obs
7. The last quarter of WI SID in 2007 is missing from the public use file
8. Recodes all look stable and similar across states
*/

OPTIONS COMPRESS = YES OBS = MAX;

libname inter "D:\Data\inter";

*Libnames for ED data;
libname AZ_ED "D:\Data\Emergency Department\ED Arizona (2005-2009)";
libname CA_ED "D:\Data\Emergency Department\ED California (2005-2010)";
libname IA_ED "D:\Data\Emergency Department\ED Iowa (2004-2009)";
libname NJ_ED "D:\Data\Emergency Department\ED New Jersey (2004-2010)";
libname WI_ED "D:\Data\Emergency Department\ED Wisconsin (2004-2010)";

*Libnames for Inpatient data;
libname AZ_INP "D:\Data\Hospital Inpatient Records\Hospital Arizona (1990-2009)";
libname CA_INP "D:\Data\Hospital Inpatient Records\Hospital California HCUP (2003-2010)";
libname IA_INP "D:\Data\Hospital Inpatient Records\Hospital Iowa (2004-2009)";
libname NJ_INP "D:\Data\Hospital Inpatient Records\Hospital New Jersey (1990-2010)";
libname WI_INP "D:\Data\Hospital Inpatient Records\Hospital Wisconsin (2004-2010)";

*Variables to keep;
%let keep_v1 = bmonth byear amonth ayear ECODE1 DX1 FEMALE DISPUNIFORM PAY1 pay1;

%macro pull_s(lib,file,keep_v2,keep_v3,keep_v4,keep_v5,dx_max);
data &file. (drop = dx2-dx&dx_max.);
   format state $2.;
   set &lib..&file. (keep = &keep_v1. &keep_v2. &keep_v3. &keep_v4. &keep_v5. dx2-dx&dx_max.);
  *Record the file name and state postal code;
   file = "&file.";
   state = substr("&lib.",1,2);
  *Code up file source;
   if substr("&lib.",4,2) = 'IN' then inpatient = 1; else inpatient = 0; 
  *In CA admission month is set to missing for a fraction of observations but in the ED data the file still includes 
   the discharge month. The variable DMONTH in the ED files is always the same as the admission month and is never supressed;
   if "&lib." eq "CA_ED" and AMONTH = . then AMONTH = DMONTH;  
   *Compute age in months relative to age 21;
   months_21 = intck('month',mdy(bmonth,14,byear+21),mdy(amonth,15,ayear));
   if months_21 = . then bad_date = 1; else bad_date = 0;
  *Flag for ED admission on inpatient in some states they switch from one source coding to 
   another in 2007. In 2007 the rates dip in those states suggesting a slippage in coding is occurring;
   if inpatient = 1 and (&keep_v2. = "1" or &keep_v3. = "7") then from_ED = 1; else from_ED = 0;
  *Loop over ICD looking for alcohol mention start with second ICD code;
   array icd{&dx_max.} DX1-DX&dx_max.;
   alcohol_mention = 0;
   alcohol_mention_oth = 0;
   i = 2; *Start with second ICD code;
   do until (i > &dx_max.);
      if substr(icd{i},1,3) in ('291','303') or substr(icd{i},1,4) = '3050' then alcohol_mention = 1; 
	 *These are other ICD codes that include alcohol;
      if substr(icd{i},1,4) in ("3575", "4255", "5353", "5710", "5711", "5712", "5713", "7903", "9800", "V113", "V791" )  then alcohol_mention_oth = 1; 
      if icd{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;  
run;
%mend;

%pull_s(AZ_ED,az_seddc_2005_core,byear,byear,ECODE1-ECODE5,TOTCHG,9);*Include byear as a place keeper here;
%pull_s(AZ_ED,az_seddc_2006_core,byear,byear,ECODE1-ECODE5,TOTCHG,9);
%pull_s(AZ_ED,az_seddc_2007_core,byear,byear,ECODE1-ECODE5,TOTCHG,9);
%pull_s(AZ_ED,az_seddc_2008_core,byear,byear,ECODE1-ECODE6,DX_Visit_Reason1 TOTCHG,25);
%pull_s(AZ_ED,az_seddc_2009_core,byear,byear,ECODE1-ECODE6,DX_Visit_Reason1 TOTCHG,25);

%pull_s(CA_ED,ca_seddc_2005_core,byear,dmonth,ECODE1-ECODE5,,25);
%pull_s(CA_ED,ca_seddc_2006_core,byear,dmonth,ECODE1-ECODE5,,25);
%pull_s(CA_ED,ca_seddc_2007_core,byear,dmonth,ECODE1-ECODE5,,25);
*%pull_s(CA_ED,ca_seddc_2008_core);*No birth year on this file or the 2009 and 2010 files;

%pull_s(IA_ED,ia_seddc_2004_core,byear,byear,ECODE1-ECODE6,TOTCHG,8);
%pull_s(IA_ED,ia_seddc_2005_core,byear,byear,ECODE1-ECODE5,TOTCHG,8);
%pull_s(IA_ED,ia_seddc_2006_core,byear,byear,ECODE1-ECODE5,TOTCHG,8);
%pull_s(IA_ED,ia_seddc_2007_core,byear,byear,ECODE1-ECODE8,DX_Visit_Reason1 TOTCHG,30);
%pull_s(IA_ED,ia_seddc_2008_core,byear,byear,ECODE1-ECODE8,DX_Visit_Reason1 TOTCHG,30);
%pull_s(IA_ED,ia_seddc_2009_core,byear,byear,ECODE1-ECODE7,DX_Visit_Reason1 TOTCHG,32);

%pull_s(NJ_ED,nj_seddc_2004_core,byear,byear,ECODE1-ECODE5,TOTCHG,9);
%pull_s(NJ_ED,nj_seddc_2005_core,byear,byear,ECODE1-ECODE5,TOTCHG,9);
%pull_s(NJ_ED,nj_seddc_2006_core,byear,byear,ECODE1-ECODE6,TOTCHG,9);
%pull_s(NJ_ED,nj_seddc_2007_core,byear,byear,ECODE1-ECODE7,DX_Visit_Reason1 TOTCHG,9);
%pull_s(NJ_ED,nj_seddc_2008_core,byear,byear,ECODE1-ECODE7,DX_Visit_Reason1 TOTCHG,24);
%pull_s(NJ_ED,nj_seddc_2009_core,byear,byear,ECODE1-ECODE8,DX_Visit_Reason1 TOTCHG,24);
%pull_s(NJ_ED,nj_seddc_2010_core,byear,byear,ECODE1-ECODE8,DX_Visit_Reason1 TOTCHG,24); 

%pull_s(WI_ED,wi_seddc_2004_core,byear,byear,ECODE1-ECODE6,TOTCHG,9);
%pull_s(WI_ED,wi_seddc_2005_core,byear,byear,ECODE1-ECODE6,TOTCHG,9);
%pull_s(WI_ED,wi_seddc_2006_core,byear,byear,ECODE1-ECODE10,TOTCHG,30);
%pull_s(WI_ED,wi_seddc_2007_core,byear,byear,ECODE1-ECODE12,DX_Visit_Reason1 TOTCHG,30);
%pull_s(WI_ED,wi_seddc_2008_core,byear,byear,ECODE1-ECODE9,DX_Visit_Reason1 TOTCHG,30);
%pull_s(WI_ED,wi_seddc_2009_core,byear,byear,ECODE1-ECODE9,DX_Visit_Reason1 TOTCHG,30);
%pull_s(WI_ED,wi_seddc_2010_core,byear,byear,ECODE1-ECODE9,DX_Visit_Reason1 TOTCHG,30); 

*Combine the files;
data inter.rep_alcohol (drop = from_ED  bmonth byear amonth ayear bad_date );
   set                    az_seddc_2005_core az_seddc_2006_core az_seddc_2007_core az_seddc_2008_core az_seddc_2009_core 
                          ca_seddc_2005_core ca_seddc_2006_core ca_seddc_2007_core
       ia_seddc_2004_core ia_seddc_2005_core ia_seddc_2006_core ia_seddc_2007_core ia_seddc_2008_core ia_seddc_2009_core  
       nj_seddc_2004_core nj_seddc_2005_core nj_seddc_2006_core nj_seddc_2007_core nj_seddc_2008_core nj_seddc_2009_core nj_seddc_2010_core
       wi_seddc_2004_core wi_seddc_2005_core wi_seddc_2006_core wi_seddc_2007_core wi_seddc_2008_core wi_seddc_2009_core wi_seddc_2010_core;
	format ad_date date.;
   *Drop bad dates there are 62,180 obs dropped for this reason and they are from all ages so the
	proportion of people with missing admission is (93318036-93255856)/93318036 =  0.0006663235;
	if bad_date = 1 then delete;
   *Restrict to visits within 40 months of the month in which the birthday falls leaves 9,844,100 admissions in this age range;
	if months_21 < -40 or months_21 > 39 then delete;
   *Drop the people that are coded as transferred from the ER to the hospital because they 
	should appear in the inpatient dataset. This is a total of 66,403 observations (0.79 percent of total ED visits) leaves 9,777,697 records;
   if inpatient = 0 and DISPUNIFORM = 2 then trans_to_hosp = 1; else trans_to_hosp = 0; 
    ad_date = mdy(amonth,15,ayear);
	if state in ('AZ','CA') and ad_date < mdy(1,1,2005) then delete;
	if state in ('IA','WI') and ad_date < mdy(1,1,2004) then delete;
   *Order Intoxication, injury (other, self, accident), mental illness and everything else is illness;
   *For this do both primary diagnosis (first diagnosis DX1 in all but a few states in time where they also have recoded visit reason)
	using visit reasons only adds a few percent more alcohol codes most times it is the same as DX1;
	alcohol = 0;
    if substr(DX1,1,3) in ('291','303') or substr(DX1,1,4) = '3050' then alcohol = 1; 
    if substr(DX_Visit_Reason1,1,3) in ('291','303') or substr(DX_Visit_Reason1,1,4) = '3050' then alcohol = 1; 
   *Overall injury category for regressions;
	if ECODE1 ne '' then injury = 1; else injury = 0;
   *Source and location of injury;
   array ECOD{12} ECODE1-ECODE12;
   inj_by_self_e = 0;
   inj_by_oth_e = 0;
   place = "       ";
  *Code up alcohol or drugs any mention;
   i = 1;
   do until (i > 12);
     *There are very few people that are coded as both (175) and going over all the ecodes only adds a few percent to the number coded; 
      if substr(ECOD{i},2,2) = '95' then inj_by_self_e = 1; 
      if substr(ECOD{i},2,2) = '96' then inj_by_oth_e = 1;  
	 *Place often coded for assaults;
      if substr(ECOD{i},2,3) = '849' then place = ECOD{i};
      if substr(ECOD{i},2,4) in ('8600','8601') then alcohol_mention_oth = 1; 
      if ECOD{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;
  *Create mutually exclusive categories precedence - alcohol,;
    if alcohol = 0 and inj_by_oth_e = 1 then inj_by_oth  = 1; else inj_by_oth = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 1 then inj_by_self  = 1; else inj_by_self = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 1 then inj_accident  = 1; else inj_accident = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 1 then alcohol_on_dx = 1; else alcohol_on_dx = 0; *Alcohol not marked as primary cause but mentioned;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 0 then illness = 1; else illness = 0; *Residual category;
  *Create combined categories;
   alcohol_any = alcohol + alcohol_on_dx;
   injury_or_alc =  inj_by_oth + inj_by_self + inj_accident + alcohol + alcohol_on_dx ;
  *Make sure the list is exhaustive;
	visit = alcohol + inj_by_self + inj_by_oth + inj_accident + alcohol_on_dx + illness;
  *Code up discharge; 
   if DISPUNIFORM = 20 then died = 1; else died = 0;
  *Flag for pregnant;
   preg_related = 0;
   if substr(DX1,1,2) in ('63','64','65','66','67') then preg_related = 1; 
   if substr(DX_Visit_Reason1,1,2) in ('63','64','65','66','67') then preg_related = 1; 
  *Flag for insurance;
   if pay1 = 2	then Medicaid = 1; else Medicaid = 0;
   if pay1 = 3	then Private = 1; else Private = 0;
   if pay1 = 4	then Self_pay = 1; else Self_pay = 0;
   if pay1 not in (2,3,4) then other_ins = 1; else other_ins = 0;
   file_short = substr(file,1,5);
   if TOTCHG = . then miss_chg = 1; else miss_chg = 0; 
   if female = . then miss_female = 1; else miss_female = 0; 
run;

*Direct list file back to lst window;
PROC PRINTTO PRINT=PRINT;
RUN; 

*Direct log file back to log window;
PROC PRINTTO LOG=LOG;
RUN; 
